package com.chuangxin.data.yuehengji;

import com.chuangxin.data.core.WebPageFetcher;
import com.chuangxin.data.core.io.DataOutput;
import com.chuangxin.data.core.io.FileWriter;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;

import java.io.File;

/**
 * Created by Dawnwords on 2015/2/1.
 */
public class SWGWSMFetcher extends WebPageFetcher {
    public static final String BASE_ULR = "http://www.yuehengji.com";
    private static final String START = "/pk/html/rtiaomi/54.html";

    public SWGWSMFetcher(DataOutput dataOutput) {
        super(dataOutput);
        putURL(BASE_ULR + START);
    }

    @Override
    protected TitleContent processDoc(Document doc, String url) {
        Elements a = doc.select("div.info_next").eq(1).select("a");
        if (a.size() > 0) {
            putURL(BASE_ULR + a.attr("href"));
        }

        String title = doc.title();
        String content = doc.select("div.info_content").html();
        return new TitleContent(title, content);
    }

    public static void main(String[] args) {
        new SWGWSMFetcher(new FileWriter("E:" + File.separator + "yuehengji" + File.separator + "swgwsm")).execute();
    }

}
